In [1]:
import subprocess
from pathlib import Path
import duckdb
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook"

ROOT = Path.cwd()
DBT_DIR = ROOT.parent / "dbt"
DUCKDB_PATH = ROOT.parent / "ispra.duckdb"
In [2]:
# Grading: rank 1-20, A (top 4), B (5-8), C (9-12), D (13-16), F (17-20)
GRADE_COLORS = {"A": "#6BCB77", "B": "#A8D672", "C": "#D4A84B", "D": "#E07A5F", "F": "#C45A3C"}

def grade_from_rank(rank):
    if rank <= 4: return "A"
    if rank <= 8: return "B"
    if rank <= 12: return "C"
    if rank <= 16: return "D"
    return "F"

def add_grades(df, value_col, ascending=True):
    """Rank regions and assign grades. ascending=True means lower value = better."""
    df = df.sort_values(value_col, ascending=ascending).reset_index(drop=True)
    df["rank"] = range(1, len(df) + 1)
    df["grade"] = df["rank"].apply(grade_from_rank)
    df["color"] = df["grade"].map(GRADE_COLORS)
    return df
In [3]:
con = duckdb.connect(DUCKDB_PATH, read_only=True)

# Waste: 2024 snapshot + full time series
waste_2024 = con.sql("""
    SELECT regione, recycling_rate_pct, waste_per_capita_kg
    FROM fct_waste_by_region WHERE anno = 2024
""").fetchdf()

waste_ts = con.sql("""
    SELECT anno, regione, recycling_rate_pct
    FROM fct_waste_by_region ORDER BY anno, regione
""").fetchdf()

# Land: aggregate comuni to regions
land = con.sql("""
    SELECT regione,
           ROUND(AVG(consumed_soil_2024_pct), 2) AS avg_consumed_pct,
           ROUND(SUM(total_net_increment_2006_2024_ha), 1) AS total_growth_ha,
           ROUND(SUM(net_increment_last_3y_ha), 1) AS recent_growth_ha
    FROM fct_land_consumption_ranking
    GROUP BY regione
""").fetchdf()

# Air: pivot NO2 and PM10 into columns, compute combined index
air = con.sql("""
    SELECT regione,
           MAX(CASE WHEN pollutant = 'NO2' THEN avg_mean END) AS no2,
           MAX(CASE WHEN pollutant = 'PM10' THEN avg_mean END) AS pm10,
           MAX(CASE WHEN pollutant = 'PM2.5' THEN avg_mean END) AS pm25,
           ROUND((MAX(CASE WHEN pollutant = 'NO2' THEN avg_mean END)
                + MAX(CASE WHEN pollutant = 'PM10' THEN avg_mean END)) / 2, 1) AS pollution_index
    FROM fct_air_quality_by_region
    WHERE anno = 2022
    GROUP BY regione
""").fetchdf()

# Normalize Friuli name
air["regione"] = air["regione"].str.replace("Friuli Venezia Giulia", "Friuli-Venezia Giulia")

con.close()
print(f"Waste: {len(waste_2024)} regions, Land: {len(land)} regions, Air: {len(air)} regions")
Waste: 20 regions, Land: 20 regions, Air: 20 regions

1. Raccolta Differenziata (Recycling Rate) — 2024¶

Italy's national target is 65%. How do the 20 regions compare?

In [4]:
w = add_grades(waste_2024, "recycling_rate_pct", ascending=False)  # higher = better

fig = px.bar(
    w, x="recycling_rate_pct", y="regione", orientation="h",
    color="grade", color_discrete_map=GRADE_COLORS,
    category_orders={"regione": w["regione"].tolist()},
    text="recycling_rate_pct",
    labels={"recycling_rate_pct": "Recycling Rate (%)", "regione": ""},
    title="Recycling Rate by Region (2024)",
)
fig.add_vline(x=65, line_dash="dash", line_color="white", opacity=0.4,
              annotation_text="65% national target", annotation_position="top")
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", textfont_size=10)
fig.update_layout(
    template="plotly_dark", height=600, showlegend=True,
    legend_title_text="Grade", yaxis=dict(autorange="reversed"),
    margin=dict(l=160),
)
fig.show()

Recycling over time: convergence or divergence?¶

15 years of data (2010–2024) show whether regions are catching up or falling further behind.

In [5]:
# Highlight top 3, bottom 3, and national average
top3 = w.head(3)["regione"].tolist()
bot3 = w.tail(3)["regione"].tolist()
highlight = top3 + bot3

ts_highlight = waste_ts[waste_ts["regione"].isin(highlight)]
ts_avg = waste_ts.groupby("anno")["recycling_rate_pct"].mean().reset_index()
ts_avg["regione"] = "Italy (avg)"

fig = px.line(
    ts_highlight, x="anno", y="recycling_rate_pct", color="regione",
    labels={"anno": "", "recycling_rate_pct": "Recycling Rate (%)", "regione": ""},
    title="Recycling Rate 2010–2024: Best vs Worst Regions",
)
fig.add_scatter(x=ts_avg["anno"], y=ts_avg["recycling_rate_pct"],
                mode="lines", name="Italy (avg)",
                line=dict(color="white", dash="dot", width=2))
fig.add_hline(y=65, line_dash="dash", line_color="rgba(255,255,255,0.3)",
              annotation_text="65% target")
fig.update_layout(template="plotly_dark", height=450)
fig.show()

More waste ≠ better recycling¶

Do regions that generate more waste per capita also recycle more? Or is high waste generation a sign of consumption excess?

In [6]:
fig = px.scatter(
    w, x="waste_per_capita_kg", y="recycling_rate_pct",
    color="grade", color_discrete_map=GRADE_COLORS,
    text="regione", size_max=12,
    labels={"waste_per_capita_kg": "Waste per Capita (kg)", "recycling_rate_pct": "Recycling Rate (%)"},
    title="Waste Generation vs Recycling Rate (2024)",
)
fig.update_traces(textposition="top center", textfont_size=9)
fig.add_hline(y=65, line_dash="dash", line_color="rgba(255,255,255,0.3)")
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()

2. Consumo di Suolo (Land Consumption) — 2024¶

How much of each region's territory has been paved over? And how fast is it still growing?

In [7]:
l = add_grades(land, "avg_consumed_pct", ascending=True)  # lower = better

fig = px.bar(
    l, x="avg_consumed_pct", y="regione", orientation="h",
    color="grade", color_discrete_map=GRADE_COLORS,
    category_orders={"regione": l["regione"].tolist()},
    text="avg_consumed_pct",
    labels={"avg_consumed_pct": "Average Consumed Soil (%)", "regione": ""},
    title="Land Consumption by Region (2024)",
)
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", textfont_size=10)
fig.update_layout(
    template="plotly_dark", height=600, showlegend=True,
    legend_title_text="Grade", yaxis=dict(autorange="reversed"),
    margin=dict(l=160),
)
fig.show()

Where is growth happening fastest?¶

Total hectares consumed since 2006 vs recent growth (last 3 years). Are the biggest consumers still accelerating?

In [8]:
fig = px.scatter(
    l, x="total_growth_ha", y="recent_growth_ha",
    color="grade", color_discrete_map=GRADE_COLORS,
    text="regione",
    labels={"total_growth_ha": "Total Growth Since 2006 (ha)",
            "recent_growth_ha": "Growth Last 3 Years (ha)"},
    title="Land Consumption: Historical vs Recent Growth",
)
fig.update_traces(textposition="top center", textfont_size=9)
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()

3. Qualità dell'Aria (Air Quality) — 2022¶

NO₂ and PM10 concentrations across regions. The Po Valley's geography traps pollutants — does the data confirm it?

In [9]:
a = add_grades(air, "pollution_index", ascending=True)  # lower = better

fig = px.bar(
    a, x="pollution_index", y="regione", orientation="h",
    color="grade", color_discrete_map=GRADE_COLORS,
    category_orders={"regione": a["regione"].tolist()},
    text="pollution_index",
    labels={"pollution_index": "Pollution Index (avg NO₂ + PM10 / 2)", "regione": ""},
    title="Air Quality by Region — Combined Pollution Index (2022)",
)
fig.update_traces(texttemplate="%{text:.1f}", textposition="outside", textfont_size=10)
fig.update_layout(
    template="plotly_dark", height=600, showlegend=True,
    legend_title_text="Grade", yaxis=dict(autorange="reversed"),
    margin=dict(l=160),
)
fig.show()

NO₂ vs PM10: different pollutants, different stories¶

NO₂ comes mainly from traffic. PM10 from heating, agriculture, and industry. Which regions suffer from which?

In [10]:
fig = px.scatter(
    a, x="no2", y="pm10", color="grade", color_discrete_map=GRADE_COLORS,
    text="regione",
    labels={"no2": "NO₂ mean (µg/m³)", "pm10": "PM10 mean (µg/m³)"},
    title="NO₂ vs PM10 by Region (2022)",
)
fig.update_traces(textposition="top center", textfont_size=9)
# WHO guidelines
fig.add_hline(y=15, line_dash="dot", line_color="rgba(107,203,119,0.4)",
              annotation_text="WHO PM10 guideline (15)")
fig.add_vline(x=10, line_dash="dot", line_color="rgba(107,203,119,0.4)",
              annotation_text="WHO NO₂ guideline (10)")
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()

4. Pagella Finale — Overall Environmental Report Card¶

Each region ranked 1–20 per metric. Grades: A (top 4), B (5–8), C (9–12), D (13–16), F (17–20). Overall grade from average rank.

In [11]:
# Build report card from the three graded DataFrames
report = w[["regione", "rank", "grade"]].rename(columns={"rank": "waste_rank", "grade": "waste_grade"})
report = report.merge(
    l[["regione", "rank", "grade"]].rename(columns={"rank": "land_rank", "grade": "land_grade"}),
    on="regione")
report = report.merge(
    a[["regione", "rank", "grade"]].rename(columns={"rank": "air_rank", "grade": "air_grade"}),
    on="regione")

report["avg_rank"] = (report["waste_rank"] + report["land_rank"] + report["air_rank"]) / 3
report = report.sort_values("avg_rank").reset_index(drop=True)
report["overall_rank"] = range(1, 21)
report["overall_grade"] = report["overall_rank"].apply(grade_from_rank)

report[["overall_rank", "regione", "waste_grade", "land_grade", "air_grade", "overall_grade", "avg_rank"]]
Out[11]:
overall_rank regione waste_grade land_grade air_grade overall_grade avg_rank
0 1 Sardegna A A A A 3.000000
1 2 Basilicata C A A A 4.666667
2 3 Valle d'Aosta B A B A 5.666667
3 4 Umbria C B A A 6.333333
4 5 Trentino-Alto Adige A B D B 7.333333
5 6 Abruzzo D B B B 8.666667
6 7 Calabria F B A B 9.333333
7 8 Marche B C C B 10.000000
8 9 Emilia-Romagna A D D C 10.333333
9 10 Friuli-Venezia Giulia B D C C 10.666667
10 11 Molise D A F C 11.000000
11 12 Toscana C C C C 11.333333
12 13 Piemonte C C D D 12.000000
13 14 Liguria D D C D 12.666667
14 15 Veneto A F F D 12.666667
15 16 Puglia D F B D 13.000000
16 17 Sicilia F D B F 14.000000
17 18 Lazio F C D F 14.000000
18 19 Lombardia B F F F 15.000000
19 20 Campania F F F F 18.333333
In [12]:
# Heatmap of grades across all metrics
grade_to_num = {"A": 5, "B": 4, "C": 3, "D": 2, "F": 1}
heatmap_data = report.set_index("regione")[["waste_grade", "land_grade", "air_grade", "overall_grade"]]
heatmap_num = heatmap_data.replace(grade_to_num)

fig = go.Figure(data=go.Heatmap(
    z=heatmap_num.values,
    x=["Rifiuti", "Suolo", "Aria", "Overall"],
    y=heatmap_data.index.tolist(),
    text=heatmap_data.values,
    texttemplate="%{text}",
    textfont={"size": 13, "color": "#1A1714"},
    colorscale=[[0, "#C45A3C"], [0.25, "#E07A5F"], [0.5, "#D4A84B"], [0.75, "#A8D672"], [1, "#6BCB77"]],
    showscale=False,
    hovertemplate="Region: %{y}<br>Metric: %{x}<br>Grade: %{text}<extra></extra>",
))
fig.update_layout(
    template="plotly_dark", height=700,
    title="Environmental Report Card — All Regions",
    yaxis=dict(autorange="reversed"),
    margin=dict(l=180),
)
fig.show()